# -*- coding:utf8 -*-
# !/usr/bin/env python

"""
#全国企业信用信息公示系统（重庆）
"""

import sys
import traceback
import time
import re
from scpy.logger import get_logger
import requests
from bs4 import BeautifulSoup
from utils import kill_captcha
import copy
import json
import cq_trans_dict as TR
import sd_template_dict as TE
import sd_format as FO

reload(sys)
sys.setdefaultencoding('utf8')

logger = get_logger(__file__)

UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"


def download_captcha_kill(companyName):
    """
    获取公司基本信息网页
    当验证码错误，或者验证码服务出现错误时，重复下载验证码并破解;
    在下载网页的过程中对方服务出现错误,重新该下载网页(目前的方式是重新破解验证码，重新下载)
    :param companyName: 公司名字或者注册号
    :return:None 或者　字符串
    若公司不存在,返回None;
    若公司存在返回公司基本信息网页;
    """
    if not companyName:
        raise ValueError("input error!")

    img_url = r'http://gsxt.cqgs.gov.cn/sc.action?width=130&height=40&fs=23'
    img_headers = {'User-Agent': UserAgent, }
    img_req = requests.session()
    img_req.headers = img_headers
    try:
        captcha = img_req.get(img_url, timeout=200).content
    except Exception, e:
        logger.error("从网站下载验证码失败！重复下载！")
        logger.error(e)
        raise Exception("download captcha error")
    if not captcha:
        logger.error("从网站下载验证码为空！重复下载！")
        return ''

    # with open('./cq.png', 'wb') as fp:
    #     fp.write(captcha)

    try:
        res_code = kill_captcha(captcha, 'cq', 'png')
        # print 'res code: ', res_code
    except Exception, e:
        logger.error("破解验证码的服务出现异常")
        logger.error(e)
        raise e
    if not res_code or len(res_code) > 100 or str(res_code) in ['None', 'wrong']:
        logger.info('验证码为:%s' % res_code)
        logger.error("破解验证码的服务出现异常,可能是下载的验证码错误，也可能破解服务出现异常！")
        return ''  # 返回空字符串，用于重复破解
    # res_code = raw_input("res_code=")

    check_url = r'http://gsxt.cqgs.gov.cn/search.action'
    check_data = {
        'key': companyName,
        'code': res_code,
    }
    check_res = img_req.post(url=check_url, data=check_data).content
    if re.findall("您搜索的条件无查询结果", check_res, re.S):
        logger.info("搜索的公司不存在！输入的关键字为:%s" % companyName)
        return None

    if re.findall("验证码不正确", check_res, re.S):
        # logger.error('check_res:%s' % check_res)
        logger.info("验证码错误！")
        return ''
    # print check_res

    com_info = re.findall(
        '''<a class='name' target="_self" data-id="(.*?)" data-type="(.*?)" data-entId="(.*?)">(.*?)</a>''', check_res,
        re.S)

    if not com_info:
        raise Exception("网站可能发生变化！")
    else:
        com_info = com_info[0]
        return com_info


def get_company_info(com_info):
    if not com_info:
        raise Exception("com_list 错误")
    raw_dict = {
        "province": "cq",
        "type": "2",
        "html": "",
        "yearList": [],
        "keyword": "",
        "companyName": "",
        "json": "",
    }
    data_id = com_info[0]
    data_type = com_info[1]
    data_entid = com_info[2]
    company_name = com_info[3]
    com_req = requests.session()
    com_req.headers = {'User-Agent': UserAgent, }
    type_url = r'http://gsxt.cqgs.gov.cn/search_ent'
    com_type_res = com_req.post(url=type_url, data={
        'id': data_id,
        'type': data_type,
        'name': company_name,
        'entId': data_entid, }).content
    # print com_type_res
    data_type = re.compile('\d+').findall(re.compile('.*ng-init=.*type=(.*);name=.*').findall(com_type_res)[0])[0]
    com_url = r'http://gsxt.cqgs.gov.cn/search_getEnt.action?'
    # 基本信息
    com_json = com_req.get(url=com_url, params={
        'id': data_id,
        'type': data_type,
        'entId': data_entid,
    }).content
    raw_dict["json"] = com_json
    # 年报
    com_year_list_url = 'http://gsxt.cqgs.gov.cn/search_getYearReport.action'
    com_year_list = com_req.get(url=com_year_list_url, params={
        'id': data_id,
        'type': data_type,
    }).content
    year_list = []
    year_url = 'http://gsxt.cqgs.gov.cn/search_getYearReportDetail.action'
    if com_year_list and len(com_year_list) > 7:
        year_history = json.loads(com_year_list[6:]).get("history", [])
        for item in year_history:
            year = str(item.get('year', ''))
            com_year_report = com_req.get(url=year_url, params={
                'id': data_id,
                'type': data_type,
                'year': year,
            }).content
            year_list.append({"year": year, "report": com_year_report})

    raw_dict["yearList"] = year_list

    return raw_dict


def extract_base_info(raw_dict):
    if not raw_dict:
        return None
    result = copy.deepcopy(TE.void_base_dict)
    basicList = []
    alterList = []
    shareHolderList = []
    checkMessage = []
    personList = []
    abnormalOperation = []

    base_json = raw_dict.get("json")
    if not base_json or len(base_json) < 7:
        return None

    base_dict = json.loads(raw_dict.get("json")[6:])
    # print json.dumps(base_dict, indent=4, ensure_ascii=False)
    # 基本信息
    raw_base = base_dict.get('base', [])
    if raw_base:
        if 'opscotype' in raw_base:
            raw_base['operateScope'] = raw_base['opscotype']
        if 'regcapcur' not in raw_base:
            raw_base['regcapcur'] = '人民币'
        res1 = FO.transform_dict(TE.basic_dict, TR.basic_dict, raw_base)
        basicList.append(res1)

    # 股份信息
    raw_share = base_dict.get('investors', [])
    for item in raw_share:
        res1 = FO.transform_dict(TE.shareHolder_dict, TR.shareHolder_dict, item)
        con_date = ''
        for i in item.get("gInvaccon", []):
            accondate = i.get("accondate", "")
            if accondate > con_date and accondate:
                con_date = accondate
        res1["conDate"] = con_date
        shareHolderList.append(res1)

    # 企业主要管理人员
    raw_person = base_dict.get('members', [])
    for item in raw_person:
        res1 = FO.transform_dict(TE.person_dict, TR.person_dict, item)
        personList.append(res1)

    # 企业历史变更信息
    raw_alter = base_dict.get('alters', [])
    for item in raw_alter:
        res1 = FO.transform_dict(TE.alter_dict, TR.alter_dict, item)
        alterList.append(res1)

    # 抽查检查信息
    raw_check = base_dict.get('ccjc', [])
    for item in raw_check:
        res1 = FO.transform_dict(TE.checkMessage_dict, TR.checkMessage_dict, item)
        checkMessage.append(res1)

    # 经营异常信息
    raw_check = base_dict.get('qyjy', [])
    for item in raw_check:
        res1 = FO.transform_dict(TE.abnormalOperation_dict, TR.abnormalOperation_dict, item)
        abnormalOperation.append(res1)

    result['basicList'] = basicList
    result['shareHolderList'] = shareHolderList
    result['alterList'] = alterList
    result['checkMessage'] = checkMessage
    result['personList'] = personList
    result['abnormalOperation'] = abnormalOperation
    result['province'] = 'cq'

    result = FO.clean_all(result)

    return result


def extract_year_info(raw_dict):
    # 年报
    if not raw_dict:
        return None
    raw_year_json_list = raw_dict.get("yearList", [])
    if not raw_year_json_list:
        return []
    yearList = []
    for a_year in raw_year_json_list:
        raw_year_json = a_year.get("report", '')
        year = a_year.get("year", '')
        if len(raw_year_json) < 7:
            raw_year_dict = {}
        else:
            raw_year_dict = json.loads(raw_year_json)

        if raw_year_dict:
            result = copy.deepcopy(TE.void_year_dict)

            website = {}
            baseInfo = {}
            investorInformations = []
            assetsInfo = {}
            equityChangeInformations = []
            changeRecords = []

            # 基本信息
            raw_base = raw_year_dict.get("base", {})
            if raw_base:
                haswebsite = raw_base.get('haswebsite', "")
                if haswebsite == '1':
                    raw_base['haswebsite'] = '是'
                else:
                    raw_base['haswebsite'] = '否'

                istransfer = raw_base.get('istransfer', "")
                if istransfer == '1':
                    raw_base['istransfer'] = '是'
                else:
                    raw_base['istransfer'] = '否'

                if "empnum" not in raw_base:
                    raw_base['empnum'] = '企业选择不公示'

                hasbrothers = raw_base.get('hasbrothers', "")
                if hasbrothers == '0':
                    raw_base['hasbrothers'] = '否'
                else:
                    raw_base['hasbrothers'] = '是'

                opstate = raw_base.get('opstate', "")
                if opstate == '1':
                    raw_base['opstate'] = '开业'
                elif opstate == '2':
                    raw_base['opstate'] = '歇业'
                elif opstate == '3':
                    raw_base['opstate'] = '清算'

                baseInfo = FO.transform_dict(TE.baseInfo_dict, TR.baseInfo_dict, raw_base)

            # 发起人及出资信息
            raw_investor = raw_year_dict.get("mNGsentinv", [])
            for item in raw_investor:
                temp_investor1 = FO.transform_dict(TE.investorInformations_dict, TR.investorInformations_dict, item)
                mNGsentinvaccon = item.get("mNGsentinvaccon")
                temp_investor2 = {}
                if mNGsentinvaccon:
                    temp_investor2 = FO.transform_dict(TE.investorInformations_dict, TR.investorInformations_dict,
                                                       mNGsentinvaccon)

                mNGsentinvsubcon = item.get("mNGsentinvsubcon")
                temp_investor3 = {}
                if mNGsentinvsubcon:
                    temp_investor3 = FO.transform_dict(TE.investorInformations_dict, TR.investorInformations_dict,
                                                       mNGsentinvsubcon)
                # 融合
                for k2, v2 in temp_investor2.items():
                    if v2:
                        temp_investor1[k2] = v2
                for k3, v3 in temp_investor3.items():
                    if v3:
                        temp_investor1[k3] = v3

                investorInformations.append(temp_investor1)

            # 网站或网店信息
            raw_web = raw_year_dict.get("webSites", [])
            if raw_web:
                website = FO.transform_dict(TE.website_dict, TR.website_dict, raw_web[0])

            # 企业资产状况信息
            raw_assets = raw_year_dict.get("means", {})
            if raw_assets:
                assetsInfo = FO.transform_dict(TE.assetsInfo_dict, TR.assetsInfo_dict, raw_assets)

            # 股权变更信息
            raw_equity = raw_year_dict.get("stocks", [])
            for item in raw_equity:
                equityChangeInformations.append(
                    FO.transform_dict(TE.equityChangeInformations_dict, TR.equityChangeInformations_dict, item))

            # 修改记录
            raw_change = raw_year_dict.get("modifies", {})
            for item in raw_change:
                changeRecords.append(FO.transform_dict(TE.changeRecords_dict, TR.changeRecords_dict, item))
            result['year'] = year
            result['website'] = website
            result['baseInfo'] = baseInfo
            result['investorInformations'] = investorInformations
            result['assetsInfo'] = assetsInfo
            result['equityChangeInformations'] = equityChangeInformations
            result['changeRecords'] = changeRecords

            result = FO.clean_all(result)
            yearList.append(result)

    return yearList


def search2(companyName, MAXTIME=40):
    res = ''
    asic_dict = {}
    # MAXTIME = 20
    a_time = MAXTIME
    while a_time > 0:
        # print res, '*'*20
        if res is None:  # 公司不存在
            return None
        elif res == '':  # 验证码错误
            if a_time < MAXTIME:
                logger.error("重复破解验证码!当前设定重复破解次数为:%s, 剩余次数为:%s " % (MAXTIME, a_time))
            a_time -= 1
            try:
                # time.sleep(10)
                res = download_captcha_kill(companyName)
                # print res
            except Exception, e:
                traceback.print_exc(e)
                raise e
        else:
            break
    com_list = res
    res = get_company_info(com_list)
    if a_time <= 1 and res == '':
        raise Exception("多次破解验证码错误,当前设置次数为：%s" % MAXTIME)
    else:
        raw_dict = res
        try:
            asic_dict = extract_base_info(raw_dict)
            year_list = extract_year_info(raw_dict)
            res['companyName'] = asic_dict['basicList'][0].get('enterpriseName', '')

            asic_dict['yearReportList'] = year_list
            gate_method = {
                'url': 'http://gsxt.cqgs.gov.cn/',
                'method': 'post',
                'province': 'cq',
                'companyName': asic_dict['basicList'][0].get('enterpriseName', ''),
                'data': com_list,
            }

            return res, asic_dict, gate_method
        except Exception, e:
            logger.info(e)
            res['companyName'] = companyName
            gate_method = {
                'url': 'http://gsxt.cqgs.gov.cn/',
                'method': 'post',
                'province': 'cq',
                'companyName': companyName,
                'data': com_list,
            }
            return res, None, gate_method


def search(companyName):
    res = search2(companyName)
    if not res:
        return None
    else:
        return res[1]


def search3(gate_method):
    if 'data' not in gate_method:
        raise Exception("gate_method error, doesn't have `data` key")
    com_list = gate_method.get('data')
    res = get_company_info(com_list)
    companyName = gate_method.get('companyName', '')

    raw_dict = res
    try:
        asic_dict = extract_base_info(raw_dict)
        year_list = extract_year_info(raw_dict)
        res['companyName'] = asic_dict['basicList'][0].get('enterpriseName', '')

        asic_dict['yearReportList'] = year_list
        gate_method = {
            'url': 'http://gsxt.cqgs.gov.cn/',
            'method': 'post',
            'province': 'cq',
            'companyName': asic_dict['basicList'][0].get('enterpriseName', ''),
            'data': com_list,
        }

        return res, asic_dict, gate_method
    except Exception, e:
        logger.info(e)
        res['companyName'] = companyName
        gate_method = {
            'url': 'http://gsxt.cqgs.gov.cn/',
            'method': 'post',
            'province': 'cq',
            'companyName': companyName,
            'data': com_list,
        }
        return res, None, gate_method


if __name__ == "__main__":
    import json
    # aCqCompanyInfo = CqCompanyInfo()
    #
    # 关键字错误
    # dota
    # 重庆
    # 重庆饭店
    # 公司
    #
    # 企业异常
    # 重庆长安工业（集团）有限责任公司长安宾馆
    # 重庆轴承工业公司汽车轴承厂
    # 重庆昇才投资咨询有限公司
    # 长征电器公司重庆公司
    #
    # 企业正常
    # 重庆长安工业（集团）有限责任公司
    # 重庆猪八戒网络有限公司
    #
    # 已吊销
    # 重庆方德信息科技有限公司

    # 重庆力帆摩托车制造有限公司
    # 重庆巴南钟厚玉诊所
    # 重庆掌宝科技
    # 重庆发电厂
    # 重庆大学(重庆)美视电影学院管理有限公司
    # 重庆市酿造调味品公司；重庆酿造调味品总厂
    # 重庆东方轮船公司
    # 旭硕科技(重庆)有限公司
    # 重庆同厦置业顾问有限公司
    # 重庆万仓房地产营销策划有限公司
    # 重庆市合川区唐智生猪养殖场
    # 重庆巴南钟厚玉诊所
    # 奥特斯科技(重庆)有限公司
    # 500000500030401
    # 重庆金易房地产开发（集团）有限公司
    # aresult = aCqCompanyInfo.cqCompanyInfo('dota', 'cq')
    # 500000500030401
    # 重庆市黔江区小莫副食批发部
    # 重庆金易房地产开发（集团）有限公司
    # 重庆市黔江区海雅水蛭养殖场
    # 重庆市黔江区晨耕花椒种植专业合作社
    # 重庆市黔江区荣帆塑钢门窗安装中心
    # 重庆视觉色装饰有限公司南坪分公司
    # 重庆渝开发物资实业公司
    # 重庆渝开发股份有限公司
    # 重庆中天玻璃钢制品有限公司
    # 重庆民本农业发展有限公司
    # 重庆渝宁化肥有限公司
    # 重庆市金牛线缆有限公司歌乐山分公司
    # 重庆渝首电子有限公司

    # 重庆市永川区陈善友水稻种植家庭农场
    # 重庆帝赛泓商贸有限公司
    # 重庆市朗晖物流有限公司

    # print search('50000050003040')
    res = search2('重庆市朗晖物流有限公司')
    # print json.dumps(search('重庆市酿造调味品公司'), ensure_ascii=False, indent=4)
    print json.dumps(res)
    print json.dumps(res, ensure_ascii=False, indent=4)
    # print json.dumps(search2('重庆民本农业发展有限公司'), ensure_ascii=False, indent=4)

    # import pdb
    # pdb.set_trace()
